Hands-on R training: REGACC Workflows IV

Overview

The usual workflow consists, with some variations:

  • Find the files with the data

  • Load the data into R and tidy it

  • Do some calculations / charts

  • Produce an output (normally a file)

ggplot2

There are three basic components of a ggplot chart

  • A data frame containing the data

  • aesthetics or roles assigned to particular variables in the data frame.

  • A geometric object which is what you are plotting.

data

library(tidyverse)
tmp<- readxl::read_xlsx(path = "data/df_publication_2023.xlsx",
                        sheet = "data") %>% 
  pivot_longer(cols= where (is.numeric),
               names_to = "time_period",
               values_to = "obs_value") %>% 
  mutate(time_period = as.integer(time_period),
         NUTS = as.factor(NUTS),
         obs_value = as.numeric(obs_value))

tmp1<- tmp %>% 
  filter(Country=="EL" & vintage=="V2023" & unit == "PPS_HAB_EU27")

data

ggplot(tmp1)

aesthetics

ggplot(tmp1, aes(x=time_period, y = obs_value))

geom

ggplot(tmp1, aes(x=time_period, y = obs_value))+
  geom_point()

geom

ggplot(tmp1, aes(x=time_period, y = obs_value))+
  geom_line()

geom + aes

ggplot(tmp1, aes(x=time_period, y = obs_value, group = geo))+
  geom_line()

geom + aes

ggplot(tmp1, aes(x=time_period, y = geo, fill = obs_value, label = obs_value))+
  geom_tile()+
  geom_text()

geom + aes

ggplot(tmp1, aes(x=time_period, y = obs_value, colour = geo, linetype=NUTS ))+
  geom_line()

geom + aes

ggplot(tmp1 %>% filter (time_period==2021), aes(x=geo, y = obs_value))+
  geom_col(aes(fill=NUTS))

geom + aes

ggplot(tmp1 %>% filter (time_period==2021), aes(x=reorder(geo,obs_value), y = obs_value))+
  geom_col(aes(fill=NUTS),width = 0.5, alpha=0.5)

geom + aes

ggplot(tmp1 %>% filter (time_period==2021), aes(y=reorder(geo,obs_value), x = obs_value))+
  geom_col(aes(fill=NUTS),width = 0.5, alpha=0.5)

geom_*

lsf.str("package:ggplot2") %>% grep("^geom_", ., value = TRUE)
 [1] "geom_abline"            "geom_area"              "geom_bar"              
 [4] "geom_bin_2d"            "geom_bin2d"             "geom_blank"            
 [7] "geom_boxplot"           "geom_col"               "geom_contour"          
[10] "geom_contour_filled"    "geom_count"             "geom_crossbar"         
[13] "geom_curve"             "geom_density"           "geom_density_2d"       
[16] "geom_density_2d_filled" "geom_density2d"         "geom_density2d_filled" 
[19] "geom_dotplot"           "geom_errorbar"          "geom_errorbarh"        
[22] "geom_freqpoly"          "geom_function"          "geom_hex"              
[25] "geom_histogram"         "geom_hline"             "geom_jitter"           
[28] "geom_label"             "geom_line"              "geom_linerange"        
[31] "geom_map"               "geom_path"              "geom_point"            
[34] "geom_pointrange"        "geom_polygon"           "geom_qq"               
[37] "geom_qq_line"           "geom_quantile"          "geom_raster"           
[40] "geom_rect"              "geom_ribbon"            "geom_rug"              
[43] "geom_segment"           "geom_sf"                "geom_sf_label"         
[46] "geom_sf_text"           "geom_smooth"            "geom_spoke"            
[49] "geom_step"              "geom_text"              "geom_tile"             
[52] "geom_violin"            "geom_vline"            

Default themes

ggplot(tmp1, aes(x=time_period, y = obs_value, colour = geo, linetype=NUTS ))+
  geom_line()+
  theme_minimal()

Facets and scales

ggplot(tmp1, aes(x=time_period, y = obs_value ))+
  geom_line(colour="darkblue")+
  geom_hline(yintercept = 100, colour="darkred")+
  facet_wrap(~geo)+
  theme_light()

Facets and scales

ggplot(tmp1, aes(x=time_period, y = obs_value ))+
  geom_line(colour="darkblue")+
  geom_hline(yintercept = 100, colour="darkred")+
  facet_wrap(~geo, scales="free_y")+
  theme_classic()

Facets and scales

ggplot(tmp1, aes(x=time_period, y = obs_value ))+
  geom_line(colour="darkblue")+
  geom_hline(yintercept = 100, colour="darkred")+
  facet_wrap(~geo)+
  scale_x_continuous(breaks=c(2005,2015), expand=c(0,0))+
  theme_classic()

Colour palettes (also scales)

p<-ggplot(tmp1, aes(x=time_period, y = fct_rev(geo) , fill = obs_value))+
  geom_tile()+
  theme_light()+
  scale_fill_viridis_c()+
  scale_x_continuous(expand=c(0,0))+
  scale_y_discrete(expand=c(0,0))
p

Theme elements

We can control almost every possible element of the chart (colours, grid lines, ticks, axis) with theme. To suppress something we use element_blank().

Theme elements

p<- p+ theme(axis.ticks = element_blank(),
         axis.title = element_blank(),
         legend.title = element_blank())

p

Text elements

We can control text elements with element_text(). Titles, subtitles, caption, tags are controlled with labs(), lines (axis lines, gridlines…) with element_line() and others (plot area, legend box…) with element_rect().

element_*

p <- p + theme(
  axis.text.y = ggplot2::element_text(size = 11, colour = "grey20"),
  legend.position = "top",
  plot.title = element_text(
    size = rel(1.6),
    face = "bold",
    color = "grey10",
    margin = margin(12, 0, 8, 0))) +
  labs(title = "Greece NUTS 2 regions GDP per capita",
       subtitle = " In PPS as % of the EU27 average",
       caption = "Source: Eurostat",
       tag = "Figure 1")

p

Custom themes

tmp1$geo2<- tmp1$geo
ggplot(tmp1, aes(x=time_period, y = obs_value ))+
  geom_line(data=tmp1 %>% select(-geo),aes(x=time_period, y = obs_value, group = geo2),colour="grey80")+
    geom_line(colour="darkblue", linewidth = 1)+
  facet_wrap(~geo)+
    scale_x_continuous(breaks=c(2005,2015), expand=c(0,0))+
  scale_y_continuous(breaks = c(40,80,120))+
  regacc::theme_ra()+
    theme(panel.grid.major = element_blank())

Comment order

Adding interactivity

We can add some basic (and more advanced) reactivity with plotly::ggplotly(), although we will see later that we will use shiny().

library(plotly)
ggplotly(p)

Adding interactivity

tx <- highlight_key(tmp1, ~geo)
base <- plot_ly(tx, color = I("black")) %>% 
  group_by(geo)
base<- base %>%
  group_by(geo) %>%
  add_lines(x = ~time_period, y = ~obs_value)

highlight(
  base, 
  on = "plotly_click", 
  selectize = TRUE, 
  dynamic = TRUE, 
  persistent = TRUE
)

Adding interactivity

Shiny

  • {shiny} is a package that makes it easy to build interactive web apps with R. It consists of a user interface where the user can interact and a server module that executes the options chosen by the user and updates the output.

  • We will see a small example in the script shiny.R